{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import itertools\n",
    "import sys\n",
    "if \"../\" not in sys.path:\n",
    "  sys.path.append(\"../\")\n",
    "from lib.utils import read_data_from_file, sign"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def pla(X, y, eta=1):\n",
    "    \"\"\"\n",
    "    Perceptron Learning Algorithm\n",
    "    Args:\n",
    "        X: 数据\n",
    "        y: 标签\n",
    "        eta: 步长\n",
    "    \n",
    "    Returns:\n",
    "        w: 特征权重\n",
    "        updates: 更新次数 \n",
    "    \"\"\"\n",
    "    N = len(X)                               # examples size\n",
    "    updates = 0                              \n",
    "    pos = 0                                  # position of last correction mistake \n",
    "    w = np.zeros_like(X[0])                  \n",
    "    \n",
    "    for i in itertools.count():\n",
    "        index = i%N\n",
    "        if sign(w.dot(X[index]))*y[index] < 0:\n",
    "            w = w + X[index]*y[index] * eta      # (try to) correct the mistake\n",
    "            updates += 1\n",
    "            pos = i\n",
    "        if i - pos >= N:\n",
    "            break\n",
    "    return w, updates"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def pocket_pla(X, y, updates=50):\n",
    "    \"\"\"\n",
    "    Pocket Perceptron Learning Algorithm\n",
    "    Args:\n",
    "        X: 数据\n",
    "        Y: 标签\n",
    "        updates: 更新次数\n",
    "    \n",
    "    Returns:\n",
    "        w_pocket: 最优特征权重\n",
    "        w: 最后更新得到的特征权重\n",
    "    \"\"\"\n",
    "    w = np.zeros_like(X[0])\n",
    "    w_pocket = w\n",
    "    mistakes = np.where(sign(X.dot(w)) != y)[0]         # get index of all mistakes\n",
    "    mis_pocket = len(mistakes)\n",
    "    \n",
    "    for i in range(updates):\n",
    "        mistake = np.random.choice(mistakes)            # pike up one mistake randomly\n",
    "        w = w + X[mistake]*y[mistake]\n",
    "        mistakes = np.where(sign(X.dot(w)) != y)[0]\n",
    "        if mis_pocket > len(mistakes): \n",
    "            w_pocket = w\n",
    "            mis_pocket = len(mistakes)\n",
    "    return w_pocket, w"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_err_rate(X, y, w):\n",
    "    err_rate = np.mean(sign(X.dot(w)) != y)\n",
    "    return err_rate"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 15-17"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "data shape:  (400, 5)\n",
      "[[ 0.97681   0.10723   0.64385   0.29556   1.      ]\n",
      " [ 0.67194   0.2418    0.83075   0.42741   1.      ]\n",
      " [ 0.20619   0.23321   0.81004   0.98691   1.      ]\n",
      " [ 0.51583   0.055814  0.92274   0.75797   1.      ]\n",
      " [ 0.70893   0.10836   0.33951   0.77058   1.      ]\n",
      " [ 0.55743   0.67804   0.061044  0.72689   1.      ]\n",
      " [ 0.15654   0.75584   0.01122   0.42598  -1.      ]\n",
      " [ 0.50462   0.15137   0.33878   0.41881   1.      ]\n",
      " [ 0.22657   0.59272   0.24103   0.46221  -1.      ]\n",
      " [ 0.49174   0.65115   0.24622   0.24796  -1.      ]]\n"
     ]
    }
   ],
   "source": [
    "data = read_data_from_file('hw1_15_train.dat')\n",
    "print('data shape: ', data.shape)\n",
    "print(data[:10])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "15. Implement a version of PLA by visiting examples in the naive cycle using the order of examples in the data set. Run the algorithm on the data set. What is the number of updates before the algorithm halts?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(array([-3.       ,  3.0841436, -1.583081 ,  2.391305 ,  4.5287635]), 45)\n"
     ]
    }
   ],
   "source": [
    "def problem15(data):\n",
    "    Y = data[:,-1]\n",
    "    X = np.concatenate((np.ones((data.shape[0],1)), data[:,:-1]), axis=1)\n",
    "    return pla(X,Y)\n",
    "print(problem15(data))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "16. Implement a version of PLA by visiting examples in fixed, pre-determined random cycles throughout the algorithm. Run the algorithm on the data set. Please repeat your experiment for 2000 times, each with a different random seed. What is the average number of updates before the algorithm halts?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def problem16(data):\n",
    "    times = 2000\n",
    "    all_updates = []\n",
    "    for i in range(times):\n",
    "        np.random.shuffle(data)\n",
    "        Y = data[:,-1]\n",
    "        X = np.concatenate((np.ones((data.shape[0],1)), data[:,:-1]), axis=1)\n",
    "        all_updates.append(pla(X,Y)[1])\n",
    "    return np.mean(np.array(all_updates))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "40.1795\n"
     ]
    }
   ],
   "source": [
    "data = read_data_from_file('hw1_15_train.dat')\n",
    "np.random.seed(0)\n",
    "print(problem16(data))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "17. Implement a version of PLA by visiting examples in fixed, pre-determined random cycles throughout the algorithm, while changing the update rule to be\n",
    "$${{\\bf{w}}_{t+1}←{\\bf{w}}_{t}+\\eta y_{n(t)}{\\bf{x}}_{n(t)}}$$\n",
    "with η=0.5. Note that your PLA in the previous Question corresponds to η=1. Please repeat your experiment for 20002000 times, each with a different random seed. What is the average number of updates before the algorithm halts?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "def problem17(data, eta=.5):\n",
    "    times = 2000\n",
    "    all_updates = []\n",
    "    for i in range(times):\n",
    "        np.random.shuffle(data)\n",
    "        Y = data[:,-1]\n",
    "        X = np.concatenate((np.ones((data.shape[0],1)), data[:,:-1]), axis=1)\n",
    "        all_updates.append(pla(X,Y, eta)[1])\n",
    "    return np.mean(np.array(all_updates))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "40.1795\n"
     ]
    }
   ],
   "source": [
    "data = read_data_from_file('hw1_15_train.dat')\n",
    "np.random.seed(0)\n",
    "print(problem17(data, eta=.5))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "40.1795\n"
     ]
    }
   ],
   "source": [
    "data = read_data_from_file('hw1_15_train.dat')\n",
    "np.random.seed(0)\n",
    "print(problem17(data, eta=.8))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* Note: 更新次数与$\\eta$无关. 这个是显然的"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 18-20"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "data_train shape:  (500, 5)\n",
      "data_test shape:  (500, 5)\n"
     ]
    }
   ],
   "source": [
    "data_train = read_data_from_file('hw1_18_train.dat') \n",
    "print('data_train shape: ', data_train.shape)\n",
    "data_test = read_data_from_file('hw1_18_test.dat')\n",
    "print('data_test shape: ', data_test.shape)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "18. Next, we play with the pocket algorithm. Modify your PLA in Question 16 to visit examples purely randomly, and then add the \"pocket\" steps to the algorithm. We will use<br/>https://www.csie.ntu.edu.tw/~htlin/mooc/datasets/mlfound_math/hw1_18_train.dat<br/>as the training data set ${\\cal {D}}$, and<br/>https://www.csie.ntu.edu.tw/~htlin/mooc/datasets/mlfound_math/hw1_18_test.dat<br/>as the test set for \"verifying'' the gg returned by your algorithm (see lecture 4 about verifying). The sets are of the same format as the previous one. Run the pocket algorithm with a total of 50 updates on ${\\cal {D}}$ , and verify the performance of ${\\bf {w}}_{POCKET}$ using the test set. Please repeat your experiment for 2000 times, each with a different random seed. What is the average error rate on the test set?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "def problem18(data_train, data_test):\n",
    "    times = 2000\n",
    "    err_rates = []\n",
    "    Y = data_train[:,-1]\n",
    "    X = np.concatenate((np.ones((data_train.shape[0],1)), data_train[:,:-1]), axis=1)\n",
    "    y_test = data_test[:,-1]\n",
    "    X_test = np.concatenate((np.ones((data_test.shape[0],1)), data_test[:,:-1]), axis=1)\n",
    "\n",
    "    for i in range(times):\n",
    "        w_pocket, w = pocket_pla(X,Y)\n",
    "        err_rates.append(get_err_rate(X_test, y_test, w_pocket))\n",
    "    return err_rates, np.mean(np.array(err_rates))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.13280200000000003\n"
     ]
    }
   ],
   "source": [
    "np.random.seed(0)\n",
    "print(problem18(data_train, data_test)[1]) "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "19. Modify your algorithm in Question 18 to return ${\\bf{w}}_{50}$ (the PLA vector after 50 updates) instead of $\\hat{{\\bf{w}}}$ (the pocket vector) after 50 updates.Run the modified algorithm on ${\\cal {D}}$, and verify the performance using the test set.Please repeat your experiment for 2000 times, each with a different random seed. What is the average error rate on the test set?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "def problem19(data_train, data_test):\n",
    "    times = 2000\n",
    "    err_rates = []\n",
    "    Y = data_train[:,-1]\n",
    "    X = np.concatenate((np.ones((data_train.shape[0],1)), data_train[:,:-1]), axis=1)\n",
    "    y_test = data_test[:,-1]\n",
    "    X_test = np.concatenate((np.ones((data_test.shape[0],1)), data_test[:,:-1]), axis=1)\n",
    "\n",
    "    for i in range(times):\n",
    "        _, w = pocket_pla(X,Y)\n",
    "        err_rates.append(get_err_rate(X_test, y_test, w))\n",
    "    return err_rates, np.mean(np.array(err_rates))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.35390999999999995\n"
     ]
    }
   ],
   "source": [
    "np.random.seed(0)\n",
    "print(problem19(data_train, data_test)[1]) "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "20. Modify your algorithm in Question 18 to run for 100 updates instead of 50, and verify the performance of ${\\bf {w}}_{POCKET}$ using the test set. Please repeat your experiment for 2000 times, each with a different random seed. What is the average error rate on the test set?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "def problem20(data_train, data_test):\n",
    "    times = 2000\n",
    "    err_rates = []\n",
    "    Y = data_train[:,-1]\n",
    "    X = np.concatenate((np.ones((data_train.shape[0],1)), data_train[:,:-1]), axis=1)\n",
    "    y_test = data_test[:,-1]\n",
    "    X_test = np.concatenate((np.ones((data_test.shape[0],1)), data_test[:,:-1]), axis=1)\n",
    "\n",
    "    for i in range(times):\n",
    "        w_pocket, _ = pocket_pla(X,Y, updates=100)\n",
    "        err_rates.append(get_err_rate(X_test, y_test, w_pocket))\n",
    "    return err_rates, np.mean(np.array(err_rates))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.11548500000000002\n"
     ]
    }
   ],
   "source": [
    "np.random.seed(0)\n",
    "print(problem20(data_train, data_test)[1]) "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Tips"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- 1. 注意numpy中常见的函数mean/sum可以直接用于处理bool类型的数组(True=1,False=0).这个是很有用的技巧"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[ True  True  True  True False  True False  True  True False]\n",
      "7\n",
      "0.7\n"
     ]
    }
   ],
   "source": [
    "np.random.seed(0)\n",
    "a = np.random.uniform(size=10) > .5\n",
    "print(a)\n",
    "print(a.sum())\n",
    "print(a.mean())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- 2. np.frompyfunc可用于将一般的func变成可直接处理array的函数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "?np.frompyfunc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": false,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {
    "height": "calc(100% - 180px)",
    "left": "10px",
    "top": "150px",
    "width": "165px"
   },
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
